cd "/Users/hausfath/Desktop/Climate Science/CRN/CRN HCN Project/"

global distance 150

*This file generates the station pairs used in the paper. 
*It calculates all permutations of CRN and HCN pairs within 100 miles of eachother that meet the selection criteria.

use ushcn_metadata.dta, clear
rename count hcn_station_months
keep station_id lon lat start_date end_date hcn_station_months
order  station_id lon lat start_date end_date

*Rename hcn station metadata entries
foreach variable of varlist station_id-hcn_station_months {
	rename `variable' `variable'_hcn
}

tempfile hcn_stations
save "`hcn_stations'", replace

use uscrn_metadata.dta, clear
keep station_id lon lat start_date end_date 
order  station_id lon lat start_date end_date

*Rename crn station metadata entries
foreach variable of varlist station_id-end_date {
	rename `variable' `variable'_crn
}


*Create all possible permutations of urban and rural pairs
cross using "`hcn_stations'"

*Convert the lat/lon values to radians
foreach value of varlist lat_crn lon_crn lat_hcn lon_hcn {
	replace `value' = `value' * _pi/180
}

*Use the spherical law of cosines formula to calculate the distance between pairs
gen R = 6371 // km	
gen distance = acos(sin(lat_hcn)*sin(lat_crn) + 		///
               cos(lat_hcn)*cos(lat_crn) * 				///
               cos(lon_crn-lon_hcn)) * R * 0.621371192 // Convert to miles

*Remove pairs from the dataset if they fail the following criteria
keep if distance <= $distance

drop if end_date_hcn < 653 //Ensure that the HCN station goes through June 2014
drop if end_date_crn < 653 //Ensure that the CRN station goes through June 2014

drop if start_date_crn > 552 //Ensure that the CRN starts before January 2006
drop if start_date_hcn > 552 //Ensure that the HCN starts before January 2006

replace start_date_hcn = 490 if start_date_hcn < 490
drop if hcn_station_months < 8 * 12 //Require at least 96 months of data from 2004-present to weed out a few stations with missing data

gen crn_record_length = end_date_crn - start_date_crn + 1
gen hcn_record_length = end_date_hcn - start_date_hcn + 1

keep station_id_crn station_id_hcn crn_record_length hcn_record_length hcn_station_months distance
sort station_id_crn
gen station_pair_id = _n
save crn_hcn_station_pairs_$distance.dta, replace
